functions
plot_rich_reads_samlenames_lm <- function(physeq, group = "Site", label = "Repeat"){
rish <- estimate_richness(physeq, measures = "Observed")
reads.sum <- as.data.frame(sample_sums(physeq))
reads.summary <- cbind(rish, reads.sum)
colnames(reads.summary) <- c("otus","reads")
reads.summary["Repeat"] <-unlist(purrr::map(stringr::str_split(rownames(physeq@sam_data), "\\.", 2), function(x) x[[2]]))
reads.summary["Site"] <- physeq@sam_data[[group]]
library(ggrepel)
require(ggforce)
p1 <- ggplot(data=reads.summary) +
geom_point(aes(y=otus, x=log2(reads), color=Site),size=3) +
geom_text_repel(aes(y=otus, x=log2(reads), label=paste0(Repeat))) +
theme_bw() +
geom_smooth(aes(y=otus, x=log2(reads), fill=Site, color=Site),method=lm, se=FALSE, ymin = 1) +
scale_x_continuous(sec.axis = sec_axis(sec.axis ~ 2**.))
return(p1)
}
beta_custom_norm_NMDS_elli_w <- function(ps, seed = 7888, normtype="vst", Color="What", Group="Repeat"){
require(phyloseq)
require(ggplot2)
require(ggpubr)
library(ggforce)
ordination.b <- ordinate(ps, "NMDS", "bray")
mds <- as.data.frame(ordination.b$points)
p <- plot_ordination(ps,
ordination.b,
type="sample",
color = Color,
title="NMDS - Bray-Curtis",
# title=NULL,
axes = c(1,2) ) +
theme_bw() +
theme(text = element_text(size = 10)) +
geom_point(size = 3) +
annotate("text",
x=min(mds$MDS1) + abs(min(mds$MDS1))/7,
y=max(mds$MDS2),
label=paste0("Stress -- ", round(ordination.b$stress, 3))) +
geom_mark_ellipse(aes_string(group = Group, label = Group),
label.fontsize = 10,
label.buffer = unit(2, "mm"),
label.minwidth = unit(5, "mm"),
con.cap = unit(0.1, "mm"),
con.colour='gray') +
theme(legend.position = "none") +
ggplot2::scale_fill_viridis_c(option = "H")
return(p)
}
plot_alpha_w_toc <- function(ps, group, metric) {
require(phyloseq)
require(ggplot2)
ps_a <-c
er <- estimate_richness(ps_a)
df_er <- cbind(ps_a@sam_data, er)
df_er <- df_er %>% select(c(group, metric))
stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
rstatix::tukey_hsd()
y <- seq(max(er[[metric]]), length=length(stat.test$p.adj), by=max(er[[metric]]/20))
plot_richness(ps_a, x=group, measures=metric) +
geom_boxplot() +
geom_point(size=1.2, alpha=0.3) +
ggpubr::stat_pvalue_manual(
stat.test,
label = "p.adj.signif",
y.position = y) +
theme_light() +
scale_color_brewer(palette="Dark2") +
theme(axis.text.x = element_text(angle = 90),
axis.title.x=element_blank()) +
labs(y=paste(metric, "index"))
}
phyloseq_to_ampvis2 <- function(physeq) {
#check object for class
if(!any(class(physeq) %in% "phyloseq"))
stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
#ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
if(is.null(physeq@tax_table))
stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
#OTUs must be in rows, not columns
if(phyloseq::taxa_are_rows(physeq))
abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
else
abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
#tax_table is assumed to have OTUs in rows too
tax <- phyloseq::tax_table(physeq)@.Data
#merge by rownames (OTUs)
otutable <- merge(
abund,
tax,
by = 0,
all.x = TRUE,
all.y = FALSE,
sort = FALSE
)
colnames(otutable)[1] <- "OTU"
#extract sample_data (metadata)
if(!is.null(physeq@sam_data)) {
metadata <- data.frame(
phyloseq::sample_data(physeq),
row.names = phyloseq::sample_names(physeq),
stringsAsFactors = FALSE,
check.names = FALSE
)
#check if any columns match exactly with rownames
#if none matched assume row names are sample identifiers
samplesCol <- unlist(lapply(metadata, function(x) {
identical(x, rownames(metadata))}))
if(any(samplesCol)) {
#error if a column matched and it's not the first
if(!samplesCol[[1]])
stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
} else {
#assume rownames are sample identifiers, merge at the end with name "SampleID"
if(any(colnames(metadata) %in% "SampleID"))
stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
metadata$SampleID <- rownames(metadata)
#reorder columns so SampleID is the first
metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
}
} else
metadata <- NULL
#extract phylogenetic tree, assumed to be of class "phylo"
if(!is.null(physeq@phy_tree)) {
tree <- phyloseq::phy_tree(physeq)
} else
tree <- NULL
#extract OTU DNA sequences, assumed to be of class "XStringSet"
if(!is.null(physeq@refseq)) {
#convert XStringSet to DNAbin using a temporary file (easiest)
fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
} else
fastaTempFile <- NULL
#load as normally with amp_load
ampvis2::amp_load(
otutable = otutable,
metadata = metadata,
tree = tree,
fasta = fastaTempFile
)
}
filter_chrmitnas <- function(physeq){
ps_object <- physeq
ps_object <- subset_taxa(ps_object, Phylum != "NA")
# ps_object@tax_table[is.na(ps_object@tax_table)] <- TRUEps@tax_table@.Data
ps_object <- subset_taxa(ps_object,
!(Family == "Mitochondria" |
Class == "Chloroplast" |
Order == "Chloroplast"))
# ps_object@tax_table <- dplyr::na_if(ps_object@tax_table, TRUE)
ps.f <- ps_object
out_old <- capture.output(physeq)[4] %>%
str_extract(regex("([1-9]).*(?= by)"))
out_new <- capture.output(ps.f)[4] %>%
str_extract(regex("([1-9]).*(?= by)"))
message(paste0("old = ", out_old))
message(paste0("filtered = ", out_new))
return(ps.f)
}
lsf.str()
## beta_custom_norm_NMDS_elli_w : function (ps, seed = 7888, normtype = "vst", Color = "What", Group = "Repeat")
## filter_chrmitnas : function (physeq)
## phyloseq_to_ampvis2 : function (physeq)
## plot_alpha_w_toc : function (ps, group, metric)
## plot_rich_reads_samlenames_lm : function (physeq, group = "Site", label = "Repeat")
ps.f <- filter_chrmitnas(ps)
## old = 15650 taxa
## filtered = 10719 taxa
plot_rich_reads_samlenames_lm(ps.f, group = "Site", label = "Sample")
## Loading required package: ggforce
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 46 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ps.ff <- prune_samples(!sample_names(ps.f) %in% paste0("Andronov.SEQ130.",c(34, 5, 15)), ps.f)
ps.ff <- prune_taxa(taxa_sums(ps.ff) > 0, ps.ff)
plot_rich_reads_samlenames_lm(ps.ff, group = "Site", label = "Repeat")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

beta_custom_norm_NMDS_elli_w(ps.ff, Color = "Site", Group = "Site")
## Loading required package: ggpubr
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.040883
## Run 1 stress 0.04088291
## ... New best solution
## ... Procrustes: rmse 0.0001136501 max resid 0.0006866252
## ... Similar to previous best
## Run 2 stress 0.04088302
## ... Procrustes: rmse 0.0001093968 max resid 0.0006465295
## ... Similar to previous best
## Run 3 stress 0.04088292
## ... Procrustes: rmse 0.00007347523 max resid 0.0004436709
## ... Similar to previous best
## Run 4 stress 0.04088297
## ... Procrustes: rmse 0.00009644391 max resid 0.000579812
## ... Similar to previous best
## Run 5 stress 0.040883
## ... Procrustes: rmse 0.00005250913 max resid 0.0002992113
## ... Similar to previous best
## Run 6 stress 0.04088288
## ... New best solution
## ... Procrustes: rmse 0.00005536902 max resid 0.0003120867
## ... Similar to previous best
## Run 7 stress 0.04088294
## ... Procrustes: rmse 0.00003866784 max resid 0.0002017634
## ... Similar to previous best
## Run 8 stress 0.04088305
## ... Procrustes: rmse 0.00007407182 max resid 0.0004018779
## ... Similar to previous best
## Run 9 stress 0.04088291
## ... Procrustes: rmse 0.00001937751 max resid 0.00009890726
## ... Similar to previous best
## Run 10 stress 0.0408829
## ... Procrustes: rmse 0.0000134097 max resid 0.0000675259
## ... Similar to previous best
## Run 11 stress 0.04088293
## ... Procrustes: rmse 0.00003389185 max resid 0.0001990864
## ... Similar to previous best
## Run 12 stress 0.04088291
## ... Procrustes: rmse 0.00002012836 max resid 0.0001021024
## ... Similar to previous best
## Run 13 stress 0.040883
## ... Procrustes: rmse 0.00006382984 max resid 0.0003486531
## ... Similar to previous best
## Run 14 stress 0.04088291
## ... Procrustes: rmse 0.00001481653 max resid 0.00006931742
## ... Similar to previous best
## Run 15 stress 0.04088301
## ... Procrustes: rmse 0.00006630298 max resid 0.0003419541
## ... Similar to previous best
## Run 16 stress 0.04088302
## ... Procrustes: rmse 0.00005111931 max resid 0.0002736245
## ... Similar to previous best
## Run 17 stress 0.04088287
## ... New best solution
## ... Procrustes: rmse 0.00002953964 max resid 0.0001524292
## ... Similar to previous best
## Run 18 stress 0.04088306
## ... Procrustes: rmse 0.0001135854 max resid 0.000592004
## ... Similar to previous best
## Run 19 stress 0.04088294
## ... Procrustes: rmse 0.00005696751 max resid 0.0002208853
## ... Similar to previous best
## Run 20 stress 0.04088291
## ... Procrustes: rmse 0.00004956004 max resid 0.0002547419
## ... Similar to previous best
## *** Best solution repeated 4 times
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`

ps.r <- rarefy_even_depth(ps.ff,rngseed = 1221)
## `set.seed(1221)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1221); .Random.seed` for the full vector
## ...
## 421OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
ps.r
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10275 taxa and 51 samples ]
## sample_data() Sample Data: [ 51 samples by 9 sample variables ]
## tax_table() Taxonomy Table: [ 10275 taxa by 7 taxonomic ranks ]
## refseq() DNAStringSet: [ 10275 reference sequences ]
beta_custom_norm_NMDS_elli_w(ps.r, Color = "Site", Group = "Site")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03281294
## Run 1 stress 0.03281294
## ... New best solution
## ... Procrustes: rmse 0.00003300971 max resid 0.0001301202
## ... Similar to previous best
## Run 2 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.000001794148 max resid 0.00000636754
## ... Similar to previous best
## Run 3 stress 0.03281305
## ... Procrustes: rmse 0.00006464719 max resid 0.0002555404
## ... Similar to previous best
## Run 4 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.000003331408 max resid 0.00001223961
## ... Similar to previous best
## Run 5 stress 0.03281296
## ... Procrustes: rmse 0.00002995194 max resid 0.0001188467
## ... Similar to previous best
## Run 6 stress 0.03281301
## ... Procrustes: rmse 0.00005133915 max resid 0.0002033569
## ... Similar to previous best
## Run 7 stress 0.032813
## ... Procrustes: rmse 0.00005186784 max resid 0.0002055802
## ... Similar to previous best
## Run 8 stress 0.03281301
## ... Procrustes: rmse 0.00005504832 max resid 0.0002181154
## ... Similar to previous best
## Run 9 stress 0.03281294
## ... Procrustes: rmse 0.00001107934 max resid 0.00004250849
## ... Similar to previous best
## Run 10 stress 0.03281294
## ... Procrustes: rmse 0.00000897434 max resid 0.00002738668
## ... Similar to previous best
## Run 11 stress 0.03281301
## ... Procrustes: rmse 0.00004782254 max resid 0.0001885485
## ... Similar to previous best
## Run 12 stress 0.03281304
## ... Procrustes: rmse 0.00006446968 max resid 0.0002554987
## ... Similar to previous best
## Run 13 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.00001661198 max resid 0.00006464267
## ... Similar to previous best
## Run 14 stress 0.03281305
## ... Procrustes: rmse 0.00008591437 max resid 0.0003396772
## ... Similar to previous best
## Run 15 stress 0.03281303
## ... Procrustes: rmse 0.00007177757 max resid 0.000282788
## ... Similar to previous best
## Run 16 stress 0.03281304
## ... Procrustes: rmse 0.0000820854 max resid 0.0003245479
## ... Similar to previous best
## Run 17 stress 0.03281295
## ... Procrustes: rmse 0.00003182524 max resid 0.0001257578
## ... Similar to previous best
## Run 18 stress 0.03281302
## ... Procrustes: rmse 0.00007563761 max resid 0.0002991288
## ... Similar to previous best
## Run 19 stress 0.03281297
## ... Procrustes: rmse 0.00004755598 max resid 0.0001879429
## ... Similar to previous best
## Run 20 stress 0.03281294
## ... Procrustes: rmse 0.00002613964 max resid 0.0001031611
## ... Similar to previous best
## *** Best solution repeated 8 times

ps.merged <- merge_samples(ps.ff, "Repeat")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
d_meta <- ps.merged@sam_data %>%
data.frame() %>%
select(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro")) %>%
scale() %>%
as.data.frame()
d_otu <- phyloseq::distance(ps.merged, "bray")
dec <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = TRUE )
sort <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = FALSE )
f_dec <- as.formula(paste0("d_otu ~", noquote(paste(dec, collapse=" + "))))
f_sort <- as.formula(paste0("d_otu ~", noquote(paste(sort, collapse=" + "))))
list(
order_forward = vegan::adonis2(f_dec, data = d_meta),
order_backward = vegan::adonis2(f_sort, data = d_meta)
)
## $order_forward
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = f_dec, data = d_meta)
## Df SumOfSqs R2 F Pr(>F)
## pH.salt 1 0.77383 0.46702 9.8349 0.001 ***
## Na.water 1 0.34096 0.20578 4.3334 0.022 *
## K.water 1 0.13181 0.07955 1.6752 0.271
## Electro 1 0.07642 0.04612 0.9713 0.495
## Cl 1 0.11950 0.07212 1.5187 0.259
## C.total 1 0.05705 0.03443 0.7251 0.616
## Residual 2 0.15736 0.09497
## Total 8 1.65694 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $order_backward
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = f_sort, data = d_meta)
## Df SumOfSqs R2 F Pr(>F)
## C.total 1 0.69264 0.41802 8.8030 0.001 ***
## Cl 1 0.31875 0.19237 4.0511 0.020 *
## Electro 1 0.13994 0.08446 1.7785 0.202
## K.water 1 0.16173 0.09761 2.0554 0.151
## Na.water 1 0.07581 0.04575 0.9634 0.500
## pH.salt 1 0.11072 0.06682 1.4072 0.293
## Residual 2 0.15736 0.09497
## Total 8 1.65694 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ps.ff@sam_data %>%
data.frame() %>%
# rownames_to_column("shit") %>%
# column_to_rownames("Name") %>%
select(which(sapply(., is.numeric)), Site) %>%
group_by(Site) %>%
summarise(across(everything(), mean),
.groups = 'drop') %>%
column_to_rownames("Site") %>%
scale() %>%
heatmaply::ggheatmap()
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust vegan

library(ggrepel)
ps.merged <- phyloseq::merge_samples(ps.ff, "Repeat")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
physeq <- ps.ff
veganifyOTU <- function(physeq){
require(phyloseq)
if(taxa_are_rows(physeq)){physeq <- t(physeq)}
return(as(otu_table(physeq), "matrix"))
}
otus.ps.vegan <- veganifyOTU(physeq)
rownames(otus.ps.vegan) <- ps.ff@sam_data$Repeat
metadata <- physeq@sam_data %>%
data.frame() %>%
select(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"))
dec <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = TRUE )
sort <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = FALSE )
f_dec <- as.formula(paste0("otus.ps.vegan ~", noquote(paste(dec, collapse=" + "))))
f_sort <- as.formula(paste0("otus.ps.vegan ~", noquote(paste(sort, collapse=" + "))))
cca_dec <- vegan::cca(f_dec, data=metadata)
cca_sort <- vegan::cca(f_sort, data=metadata)
kate.ggcca.sites <- function(vare.cca){
library(tidyverse)
biplot <- as.data.frame(vare.cca$CCA$biplot)
wa <- as.data.frame(vare.cca$CCA$wa)
biplot <- rownames_to_column(biplot, "Label") %>%
add_column(Score = rep("biplot", length(rownames(biplot))))
wa <- rownames_to_column(wa, "Label") %>%
add_column(Score = rep("sites", length(rownames(wa))))
fdat_amazing <- rbind(biplot, wa)
fdat_amazing <- fdat_amazing %>%
mutate(label_size = ifelse(Score == "biplot", 4.5, 3))
p <- ggplot(fdat_amazing %>% filter(Score %in% c("sites","biplot"))) +
# geom_abline(intercept=seq(-100, 100, 25), slope=1, colour="grey", alpha=0.5) +
# geom_abline(intercept=seq(-100, 100, 25), slope=-1, colour="grey", alpha=0.5) +
geom_vline(xintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_hline(yintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_point(data = fdat_amazing %>% dplyr::filter(Score == "sites"), mapping = aes(x=CCA1, y=CCA2, colour = factor(Score))) +
geom_segment(data = fdat_amazing %>% dplyr::filter(Score == "biplot"), aes(x = 0, xend = CCA1, y = 0, yend = CCA2),
size = 0.6, alpha=0.8, color = "red",arrow = arrow(angle = 3)) +
geom_text_repel(aes(x=CCA1, y=CCA2, label= Label),size=fdat_amazing$label_size) +
xlab(paste0(round(vare.cca$CCA$eig[1], 3)*100, "% CCA1")) +
ylab(paste0(round(vare.cca$CCA$eig[2], 3)*100, "% CCA2")) +
grids(linetype = "dashed") +
theme(legend.position = "none", panel.background = element_rect(fill = "white", colour = "grey50"))
return(p)
}
cca.sites <- kate.ggcca.sites(cca_dec)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
cca.sites
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

vegan::vif.cca(cca_dec)
## pH.salt Na.water K.water Electro Cl C.total
## 12.164455 27.848814 1.678713 191.952968 215.325139 30.290554
library(ggrepel)
ps.fff <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 3) > (0.1*length(x)), TRUE)
# ps.varstab <- ps_vst(ps.fff, "type")
ps.varstab <- ps.ff
# ps.varstab.merged <- phyloseq::merge_samples(ps.varstab, "Repeat")
# ps.varstab.merged.family <- tax_glom(ps.fff, taxrank="Family")
physeq <- ps.fff
ps.varstab <- physeq
veganifyOTU <- function(physeq){
require(phyloseq)
if(taxa_are_rows(physeq)){physeq <- t(physeq)}
return(as(otu_table(physeq), "matrix"))
}
otus.ps.vegan <- veganifyOTU(physeq)
metadata <- as(sample_data(physeq), "data.frame")
cca_w_varstab_asv <- vegan::cca(f_dec, data=metadata)
wa <- as.data.frame(cca_w_varstab_asv$CCA$v) %>%
rownames_to_column("ID")
taxa.pruned <- as.data.frame(ps.varstab@tax_table@.Data) %>%
rownames_to_column("ID")
taxa.pruned <- taxa.pruned %>%
mutate_all(as.character)
# old.tips <- tree$tip.label
# matches <- regmatches(tree$tip.label, gregexpr("[[:digit:]]+", tree$tip.label))
# taxa.pruned$number <- as.numeric(unlist(matches))
# Shitty taxa formating part
taxa.pruned$taxa <- ifelse(is.na(taxa.pruned$Genus),
ifelse(is.na(taxa.pruned$Family),
ifelse(is.na(taxa.pruned$Order),
ifelse(is.na(taxa.pruned$Class), taxa.pruned$Phylum, taxa.pruned$Class) , taxa.pruned$Order), taxa.pruned$Family), taxa.pruned$Genus)
taxa.pruned[taxa.pruned == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
taxa.pruned[taxa.pruned == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Pararhizobium"
taxa.pruned$taxa2 <- ifelse(is.na(taxa.pruned$Species),
with(taxa.pruned, paste0(taxa)),
with(taxa.pruned, paste0(taxa, " ", Species )))
taxa.pruned$phylum <- ifelse(taxa.pruned$Phylum == "Proteobacteria", with(taxa.pruned, paste0(taxa.pruned$Class)), with(taxa.pruned, paste0(taxa.pruned$Phylum)))
taxa.pruned$Label <- paste0(taxa.pruned$ID, "_", taxa.pruned$taxa2)
wa <- full_join(wa, taxa.pruned, by="ID") %>%
mutate(Score = "sites")
#For the top 50 most abundant taxa, skip if use des res
head_asv <- ps.varstab@otu_table@.Data %>%
data.frame %>%
colSums() %>%
sort(decreasing = TRUE) %>%
head(n=100) %>%
names()
wa <- filter(wa, ID %in% head_asv)
# maybe only different by des? Picture will be nice.
# wa <- filter(wa, ID %in% row.des)
# wa <- filter(wa, ID %in% row.des.so)
biplot <- as.data.frame(cca_w_varstab_asv$CCA$biplot)
biplot <- rownames_to_column(biplot, "Label") %>%
add_column(Score = rep("biplot", length(rownames(biplot))))
fdat_amazing <- plyr::rbind.fill(biplot, wa) %>%
mutate(Label = as.factor(Label))
fdat_amazing <- fdat_amazing %>%
mutate(label_size = ifelse(Score == "biplot", 4.5, 3))
p.cca.species <- ggplot(fdat_amazing %>% filter(Score %in% c("sites","biplot"))) +
geom_point(data = fdat_amazing %>% dplyr::filter(Score == "sites"), mapping = aes(x=CCA1, y=CCA2)) +
geom_segment(data = fdat_amazing %>% dplyr::filter(Score == "biplot"), aes(x = 0, xend = CCA1, y = 0, yend = CCA2), alpha=0.8, color = "red", arrow = arrow(angle = 3)) +
geom_text_repel(aes(x=CCA1, y=CCA2, label= Label, colour = phylum), size=fdat_amazing$label_size) +
xlab(paste0(round(cca_w_varstab_asv$CCA$eig[1], 3)*100, "% CCA1")) +
ylab(paste0(round(cca_w_varstab_asv$CCA$eig[2], 3)*100, "% CCA2")) +
grids(linetype = "dashed") +
geom_vline(xintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_hline(yintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
theme(legend.position = "top", panel.background = element_rect(fill = "white", colour = "grey50"))
p.cca.species
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

amp <- phyloseq_to_ampvis2(ps.ff)
amp_heatmap(amp,
tax_show = 22,
group_by = "Site",
tax_aggregate = "Phylum",
tax_class = "Proteobacteria",
normalise=TRUE,
showRemainingTaxa = TRUE)

ps.inall <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
tax_show = 40,
group_by = "Site",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=TRUE,
showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

ps.exl <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.per <- phyloseq::transform_sample_counts(ps.ff, function(x) x / sum(x) * 100)
ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)
amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)
amp_heatmap(amp.exl.r, tax_show = 60,
group_by = "Site",
tax_aggregate = "OTU",
tax_add = "Genus",
round = 2,
normalise=FALSE,
showRemainingTaxa = FALSE)
## Warning: Transformation introduced infinite values in discrete y-axis

# ancom_da_glob <- ANCOMBC::ancombc(phyloseq = ps.inall,
# formula = "Site",
# p_adj_method = "fdr",
# lib_cut = 1000,
# group = "Site",
# struc_zero = TRUE,
# neg_lb = TRUE,
# tol = 1e-5,
# max_iter = 100,
# conserve = TRUE,
# alpha = 0.05,
# global = TRUE)
set.seed(123)
ancom <- ANCOMBC::ancombc2(data = ps.inall,
tax_level = NULL,
fix_formula = "Site + Electro + pH.salt",
rand_formula = NULL,
p_adj_method = "fdr",
pseudo = 0,
pseudo_sens = TRUE,
prv_cut = 0.10,
lib_cut = 1000,
s0_perc = 0.05,
group = "Site",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
n_cl = 10,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = FALSE,
iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
nrow = 2,
byrow = TRUE),
matrix(c(-1, 0, 1, -1),
nrow = 2,
byrow = TRUE)),
node = list(2, 2),
solver = "ECOS",
B = 100)
)
## Registered S3 methods overwritten by 'proxy':
## method from
## print.registry_field registry
## print.registry_entry registry
## `tax_level` is not speficified
## No agglomeration will be performed
## Otherwise, please speficy `tax_level` by one of the following:
## Kingdom, Phylum, Class, Order, Family, Genus, Species
## Obtaining initial estimates ...
## ML iteration = 1, epsilon = 1.3
## ML iteration = 2, epsilon = 8.3e-14
## Estimating sample-specific biases ...
## ANCOM-BC2 primary results ...
## The sensitivity analysis for the pseudo-count addition ...
## ANCOM-BC2 global test ...
## ANCOM-BC2 pairwise directional test ...
## ANCOM-BC2 Dunnet's type of test ...
samp_frac = ancom$samp_frac
# Replace NA with 0
samp_frac[is.na(samp_frac)] = 0
# Add pesudo-count (1) to avoid taking the log of 0
log_obs_abn = log(ancom$feature_table + 1)
# Adjust the log observed abundances
log_corr_abn = t(t(log_obs_abn) - samp_frac)
# Show the first 6 samples
round(log_corr_abn[, 1:6], 2) %>%
DT::datatable(caption = "Bias-corrected log observed abundances")
ancom$res_pair %>%
DT::datatable(caption = "Bias-corrected log observed abundances")
sum <- log_corr_abn %>%
as.data.frame() %>%
rowSums()
log_corr_abn %>%
t() %>%
as.data.frame() %>%
length()
## [1] 532
ps.an <- prune_taxa(taxa_names(ps.ff) %in% rownames(log_corr_abn), ps.ff)
otu_table(ps.an) <- otu_table(t(log_corr_abn), taxa_are_rows = FALSE)
ps.an <- merge_samples(ps.an, "Site", fun = mean)
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
mean.d <- ps.an@otu_table %>%
t() %>%
data.frame() %>%
mutate(
lfc_Sitehigh_road = (Ashan + high_road)/2,
lfc_SiteMiddle = (Ashan + Middle)/2,
lfc_SiteMiddle_Sitehigh_road = (Middle + high_road)/2
) %>%
rownames_to_column("taxon") %>%
select(c("taxon","lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road")) %>%
pivot_longer(!taxon, names_to = "pair", values_to = "mean")
sum.d <- data.frame(taxon = names(sum), sum = sum, row.names = NULL)
lfc <- ancom$res_pair %>%
select(c("taxon","lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road"))
data_for_plot <- ancom$res_pair %>%
select(c("taxon","lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road")) %>%
pivot_longer(c("lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road"), names_to = "pair") %>%
left_join(mean.d, by=c("taxon", "pair"))
data_for_plot
## # A tibble: 1,596 × 4
## taxon pair value mean
## <chr> <chr> <dbl> <dbl>
## 1 Seq1 lfc_Sitehigh_road -2.52 99.6
## 2 Seq1 lfc_SiteMiddle -3.31 89.9
## 3 Seq1 lfc_SiteMiddle_Sitehigh_road -0.788 108.
## 4 Seq2 lfc_Sitehigh_road 0.681 88.3
## 5 Seq2 lfc_SiteMiddle 0.664 80.3
## 6 Seq2 lfc_SiteMiddle_Sitehigh_road -0.0171 109.
## 7 Seq3 lfc_Sitehigh_road -2.65 88.5
## 8 Seq3 lfc_SiteMiddle -2.20 84.0
## 9 Seq3 lfc_SiteMiddle_Sitehigh_road 0.459 107.
## 10 Seq5 lfc_Sitehigh_road -1.77 84.4
## # … with 1,586 more rows
data_for_plot %>%
ggplot(aes(x=mean, y=value, label=taxon, color=pair)) +
geom_point() +
geom_smooth(aes(y=value, x=mean, fill=pair, color=pair),
method=lm,
method.args = list(family = "quasipoisson"),
se=TRUE) +
theme_bw() +
facet_wrap(~pair)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: In lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...) :
## extra argument 'family' will be disregarded
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: In lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...) :
## extra argument 'family' will be disregarded
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: In lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...) :
## extra argument 'family' will be disregarded
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

l <- lm(value ~ mean*pair, data=data_for_plot)
summary(l)
##
## Call:
## lm(formula = value ~ mean * pair, data = data_for_plot)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2166 -1.2223 0.0452 1.3728 7.6588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.347724 0.401319 3.358 0.000803
## mean -0.042755 0.007622 -5.609 0.0000000239
## pairlfc_SiteMiddle -0.188361 0.531089 -0.355 0.722885
## pairlfc_SiteMiddle_Sitehigh_road -1.592645 0.530068 -3.005 0.002701
## mean:pairlfc_SiteMiddle -0.010654 0.010575 -1.007 0.313867
## mean:pairlfc_SiteMiddle_Sitehigh_road 0.039059 0.009855 3.963 0.0000771604
##
## (Intercept) ***
## mean ***
## pairlfc_SiteMiddle
## pairlfc_SiteMiddle_Sitehigh_road **
## mean:pairlfc_SiteMiddle
## mean:pairlfc_SiteMiddle_Sitehigh_road ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.084 on 1590 degrees of freedom
## Multiple R-squared: 0.07491, Adjusted R-squared: 0.072
## F-statistic: 25.75 on 5 and 1590 DF, p-value: < 2.2e-16